import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
Tz=pd.read_csv('Train .csv')
Tz
| ID | country | age_group | travel_with | total_female | total_male | purpose | main_activity | info_source | tour_arrangement | ... | package_transport_tz | package_sightseeing | package_guided_tour | package_insurance | night_mainland | night_zanzibar | payment_mode | first_trip_tz | most_impressing | total_cost | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | tour_0 | SWIZERLAND | 45-64 | Friends/Relatives | 1.0 | 1.0 | Leisure and Holidays | Wildlife tourism | Friends, relatives | Independent | ... | No | No | No | No | 13.0 | 0.0 | Cash | No | Friendly People | 674602.5 |
| 1 | tour_10 | UNITED KINGDOM | 25-44 | NaN | 1.0 | 0.0 | Leisure and Holidays | Cultural tourism | others | Independent | ... | No | No | No | No | 14.0 | 7.0 | Cash | Yes | Wonderful Country, Landscape, Nature | 3214906.5 |
| 2 | tour_1000 | UNITED KINGDOM | 25-44 | Alone | 0.0 | 1.0 | Visiting Friends and Relatives | Cultural tourism | Friends, relatives | Independent | ... | No | No | No | No | 1.0 | 31.0 | Cash | No | Excellent Experience | 3315000.0 |
| 3 | tour_1002 | UNITED KINGDOM | 25-44 | Spouse | 1.0 | 1.0 | Leisure and Holidays | Wildlife tourism | Travel, agent, tour operator | Package Tour | ... | Yes | Yes | Yes | No | 11.0 | 0.0 | Cash | Yes | Friendly People | 7790250.0 |
| 4 | tour_1004 | CHINA | 1-24 | NaN | 1.0 | 0.0 | Leisure and Holidays | Wildlife tourism | Travel, agent, tour operator | Independent | ... | No | No | No | No | 7.0 | 4.0 | Cash | Yes | No comments | 1657500.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4804 | tour_993 | UAE | 45-64 | Alone | 0.0 | 1.0 | Business | Hunting tourism | Friends, relatives | Independent | ... | No | No | No | No | 2.0 | 0.0 | Credit Card | No | No comments | 3315000.0 |
| 4805 | tour_994 | UNITED STATES OF AMERICA | 25-44 | Spouse | 1.0 | 1.0 | Leisure and Holidays | Wildlife tourism | Travel, agent, tour operator | Package Tour | ... | Yes | Yes | Yes | Yes | 11.0 | 0.0 | Cash | Yes | Friendly People | 10690875.0 |
| 4806 | tour_995 | NETHERLANDS | 1-24 | NaN | 1.0 | 0.0 | Leisure and Holidays | Wildlife tourism | others | Independent | ... | No | No | No | No | 3.0 | 7.0 | Cash | Yes | Good service | 2246636.7 |
| 4807 | tour_997 | SOUTH AFRICA | 25-44 | Friends/Relatives | 1.0 | 1.0 | Business | Beach tourism | Travel, agent, tour operator | Independent | ... | No | No | No | No | 5.0 | 0.0 | Credit Card | No | Friendly People | 1160250.0 |
| 4808 | tour_999 | UNITED KINGDOM | 25-44 | Spouse | 1.0 | 1.0 | Leisure and Holidays | Wildlife tourism | Travel, agent, tour operator | Package Tour | ... | Yes | Yes | Yes | No | 4.0 | 7.0 | Cash | Yes | Friendly People | 13260000.0 |
4809 rows × 23 columns
Tz.head(20)
| ID | country | age_group | travel_with | total_female | total_male | purpose | main_activity | info_source | tour_arrangement | ... | package_transport_tz | package_sightseeing | package_guided_tour | package_insurance | night_mainland | night_zanzibar | payment_mode | first_trip_tz | most_impressing | total_cost | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | tour_0 | SWIZERLAND | 45-64 | Friends/Relatives | 1.0 | 1.0 | Leisure and Holidays | Wildlife tourism | Friends, relatives | Independent | ... | No | No | No | No | 13.0 | 0.0 | Cash | No | Friendly People | 674602.5 |
| 1 | tour_10 | UNITED KINGDOM | 25-44 | NaN | 1.0 | 0.0 | Leisure and Holidays | Cultural tourism | others | Independent | ... | No | No | No | No | 14.0 | 7.0 | Cash | Yes | Wonderful Country, Landscape, Nature | 3214906.5 |
| 2 | tour_1000 | UNITED KINGDOM | 25-44 | Alone | 0.0 | 1.0 | Visiting Friends and Relatives | Cultural tourism | Friends, relatives | Independent | ... | No | No | No | No | 1.0 | 31.0 | Cash | No | Excellent Experience | 3315000.0 |
| 3 | tour_1002 | UNITED KINGDOM | 25-44 | Spouse | 1.0 | 1.0 | Leisure and Holidays | Wildlife tourism | Travel, agent, tour operator | Package Tour | ... | Yes | Yes | Yes | No | 11.0 | 0.0 | Cash | Yes | Friendly People | 7790250.0 |
| 4 | tour_1004 | CHINA | 1-24 | NaN | 1.0 | 0.0 | Leisure and Holidays | Wildlife tourism | Travel, agent, tour operator | Independent | ... | No | No | No | No | 7.0 | 4.0 | Cash | Yes | No comments | 1657500.0 |
| 5 | tour_1005 | UNITED KINGDOM | 25-44 | NaN | 0.0 | 1.0 | Leisure and Holidays | Wildlife tourism | Travel, agent, tour operator | Package Tour | ... | No | Yes | Yes | No | 9.0 | 3.0 | Cash | Yes | Wildlife | 120950.0 |
| 6 | tour_1007 | SOUTH AFRICA | 45-64 | Alone | 0.0 | 1.0 | Business | Mountain climbing | Friends, relatives | Independent | ... | No | No | No | No | 9.0 | 0.0 | Cash | Yes | Friendly People | 466140.0 |
| 7 | tour_1008 | UNITED STATES OF AMERICA | 45-64 | Friends/Relatives | 1.0 | 1.0 | Leisure and Holidays | Wildlife tourism | Travel, agent, tour operator | Package Tour | ... | Yes | Yes | Yes | Yes | 10.0 | 3.0 | Cash | Yes | Friendly People | 3480750.0 |
| 8 | tour_101 | NIGERIA | 25-44 | Alone | 0.0 | 1.0 | Leisure and Holidays | Cultural tourism | Travel, agent, tour operator | Independent | ... | No | No | No | No | 4.0 | 0.0 | Cash | Yes | NaN | 994500.0 |
| 9 | tour_1011 | INDIA | 25-44 | Alone | 1.0 | 0.0 | Business | Wildlife tourism | Travel, agent, tour operator | Independent | ... | No | No | No | No | 5.0 | 0.0 | Credit Card | Yes | Friendly People | 2486250.0 |
| 10 | tour_1012 | BRAZIL | 25-44 | Spouse | 1.0 | 1.0 | Leisure and Holidays | Wildlife tourism | Radio, TV, Web | Independent | ... | No | No | No | No | 17.0 | 3.0 | Cash | Yes | Wonderful Country, Landscape, Nature | 1117155.0 |
| 11 | tour_1013 | CANADA | 45-64 | Children | 2.0 | 0.0 | Leisure and Holidays | Beach tourism | Friends, relatives | Independent | ... | No | No | No | No | 30.0 | 0.0 | Cash | No | Excellent Experience | 8121750.0 |
| 12 | tour_1016 | CANADA | 45-64 | Children | 0.0 | 2.0 | Leisure and Holidays | Wildlife tourism | Travel, agent, tour operator | Independent | ... | No | No | No | No | 11.0 | 3.0 | Cash | Yes | No comments | 331500.0 |
| 13 | tour_1017 | MALT | 25-44 | Friends/Relatives | 2.0 | 0.0 | Leisure and Holidays | Wildlife tourism | Friends, relatives | Package Tour | ... | Yes | No | No | No | 10.0 | 0.0 | Cash | Yes | No comments | 11346650.0 |
| 14 | tour_1018 | MOZAMBIQUE | 25-44 | Alone | 0.0 | 1.0 | Visiting Friends and Relatives | Beach tourism | Friends, relatives | Independent | ... | No | No | No | No | 2.0 | 0.0 | Cash | Yes | Wildlife | 497250.0 |
| 15 | tour_102 | RWANDA | 65+ | Alone | 1.0 | 0.0 | Leisure and Holidays | Beach tourism | Friends, relatives | Independent | ... | No | No | No | No | 0.0 | 2.0 | Cash | Yes | Wonderful Country, Landscape, Nature | 331500.0 |
| 16 | tour_1021 | AUSTRIA | 45-64 | Friends/Relatives | 4.0 | 1.0 | Visiting Friends and Relatives | Mountain climbing | Friends, relatives | Independent | ... | No | No | No | No | 24.0 | 0.0 | Cash | No | Friendly People | 2000000.0 |
| 17 | tour_1022 | MYANMAR | 25-44 | NaN | 1.0 | 0.0 | Meetings and Conference | Wildlife tourism | Radio, TV, Web | Independent | ... | No | No | No | No | 5.0 | 0.0 | Cash | Yes | Friendly People | 331500.0 |
| 18 | tour_1024 | GERMANY | 25-44 | Children | 1.0 | 1.0 | Visiting Friends and Relatives | Cultural tourism | Friends, relatives | Independent | ... | No | No | No | No | 3.0 | 0.0 | Cash | Yes | Friendly People | 2269330.0 |
| 19 | tour_1026 | KENYA | 25-44 | NaN | 1.0 | 0.0 | Business | Mountain climbing | Friends, relatives | Independent | ... | No | No | No | No | 4.0 | 0.0 | Cash | No | Friendly People | 377520.0 |
20 rows × 23 columns
Tz.describe()
| total_female | total_male | night_mainland | night_zanzibar | total_cost | |
|---|---|---|---|---|---|
| count | 4806.000000 | 4804.000000 | 4809.000000 | 4809.000000 | 4.809000e+03 |
| mean | 0.926758 | 1.009575 | 8.488043 | 2.304429 | 8.114389e+06 |
| std | 1.288242 | 1.138865 | 10.427624 | 4.227080 | 1.222490e+07 |
| min | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 4.900000e+04 |
| 25% | 0.000000 | 1.000000 | 3.000000 | 0.000000 | 8.121750e+05 |
| 50% | 1.000000 | 1.000000 | 6.000000 | 0.000000 | 3.397875e+06 |
| 75% | 1.000000 | 1.000000 | 11.000000 | 4.000000 | 9.945000e+06 |
| max | 49.000000 | 44.000000 | 145.000000 | 61.000000 | 9.953288e+07 |
Tz.isnull().sum()
ID 0 country 0 age_group 0 travel_with 1114 total_female 3 total_male 5 purpose 0 main_activity 0 info_source 0 tour_arrangement 0 package_transport_int 0 package_accomodation 0 package_food 0 package_transport_tz 0 package_sightseeing 0 package_guided_tour 0 package_insurance 0 night_mainland 0 night_zanzibar 0 payment_mode 0 first_trip_tz 0 most_impressing 313 total_cost 0 dtype: int64
Tz['travel_with'].fillna(value='Alone', inplace=True)
Tz['most_impressing'].fillna(value='friendly people', inplace=True)
Tz['total_male'].fillna(method='bfill', inplace=True)
Tz['total_female'].fillna(method='bfill', inplace=True)
Tz.isnull().sum()
ID 0 country 0 age_group 0 travel_with 0 total_female 0 total_male 0 purpose 0 main_activity 0 info_source 0 tour_arrangement 0 package_transport_int 0 package_accomodation 0 package_food 0 package_transport_tz 0 package_sightseeing 0 package_guided_tour 0 package_insurance 0 night_mainland 0 night_zanzibar 0 payment_mode 0 first_trip_tz 0 most_impressing 0 total_cost 0 dtype: int64
Tz.shape
(4809, 23)
Tz.drop_duplicates()
| ID | country | age_group | travel_with | total_female | total_male | purpose | main_activity | info_source | tour_arrangement | ... | package_transport_tz | package_sightseeing | package_guided_tour | package_insurance | night_mainland | night_zanzibar | payment_mode | first_trip_tz | most_impressing | total_cost | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | tour_0 | SWIZERLAND | 45-64 | Friends/Relatives | 1.0 | 1.0 | Leisure and Holidays | Wildlife tourism | Friends, relatives | Independent | ... | No | No | No | No | 13.0 | 0.0 | Cash | No | Friendly People | 674602.5 |
| 1 | tour_10 | UNITED KINGDOM | 25-44 | Alone | 1.0 | 0.0 | Leisure and Holidays | Cultural tourism | others | Independent | ... | No | No | No | No | 14.0 | 7.0 | Cash | Yes | Wonderful Country, Landscape, Nature | 3214906.5 |
| 2 | tour_1000 | UNITED KINGDOM | 25-44 | Alone | 0.0 | 1.0 | Visiting Friends and Relatives | Cultural tourism | Friends, relatives | Independent | ... | No | No | No | No | 1.0 | 31.0 | Cash | No | Excellent Experience | 3315000.0 |
| 3 | tour_1002 | UNITED KINGDOM | 25-44 | Spouse | 1.0 | 1.0 | Leisure and Holidays | Wildlife tourism | Travel, agent, tour operator | Package Tour | ... | Yes | Yes | Yes | No | 11.0 | 0.0 | Cash | Yes | Friendly People | 7790250.0 |
| 4 | tour_1004 | CHINA | 1-24 | Alone | 1.0 | 0.0 | Leisure and Holidays | Wildlife tourism | Travel, agent, tour operator | Independent | ... | No | No | No | No | 7.0 | 4.0 | Cash | Yes | No comments | 1657500.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4804 | tour_993 | UAE | 45-64 | Alone | 0.0 | 1.0 | Business | Hunting tourism | Friends, relatives | Independent | ... | No | No | No | No | 2.0 | 0.0 | Credit Card | No | No comments | 3315000.0 |
| 4805 | tour_994 | UNITED STATES OF AMERICA | 25-44 | Spouse | 1.0 | 1.0 | Leisure and Holidays | Wildlife tourism | Travel, agent, tour operator | Package Tour | ... | Yes | Yes | Yes | Yes | 11.0 | 0.0 | Cash | Yes | Friendly People | 10690875.0 |
| 4806 | tour_995 | NETHERLANDS | 1-24 | Alone | 1.0 | 0.0 | Leisure and Holidays | Wildlife tourism | others | Independent | ... | No | No | No | No | 3.0 | 7.0 | Cash | Yes | Good service | 2246636.7 |
| 4807 | tour_997 | SOUTH AFRICA | 25-44 | Friends/Relatives | 1.0 | 1.0 | Business | Beach tourism | Travel, agent, tour operator | Independent | ... | No | No | No | No | 5.0 | 0.0 | Credit Card | No | Friendly People | 1160250.0 |
| 4808 | tour_999 | UNITED KINGDOM | 25-44 | Spouse | 1.0 | 1.0 | Leisure and Holidays | Wildlife tourism | Travel, agent, tour operator | Package Tour | ... | Yes | Yes | Yes | No | 4.0 | 7.0 | Cash | Yes | Friendly People | 13260000.0 |
4809 rows × 23 columns
Tz.shape
(4809, 23)
Tz.describe()
| total_female | total_male | night_mainland | night_zanzibar | total_cost | |
|---|---|---|---|---|---|
| count | 4809.000000 | 4809.000000 | 4809.000000 | 4809.000000 | 4.809000e+03 |
| mean | 0.927012 | 1.009773 | 8.488043 | 2.304429 | 8.114389e+06 |
| std | 1.288257 | 1.139276 | 10.427624 | 4.227080 | 1.222490e+07 |
| min | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 4.900000e+04 |
| 25% | 0.000000 | 1.000000 | 3.000000 | 0.000000 | 8.121750e+05 |
| 50% | 1.000000 | 1.000000 | 6.000000 | 0.000000 | 3.397875e+06 |
| 75% | 1.000000 | 1.000000 | 11.000000 | 4.000000 | 9.945000e+06 |
| max | 49.000000 | 44.000000 | 145.000000 | 61.000000 | 9.953288e+07 |
Tz['country'].value_counts()
UNITED STATES OF AMERICA 695
UNITED KINGDOM 533
ITALY 393
FRANCE 280
ZIMBABWE 274
...
NIGER 1
MYANMAR 1
DOMINICA 1
TUNISIA 1
ESTONIA 1
Name: country, Length: 105, dtype: int64
Tz['payment_mode'].value_counts()
Cash 4172 Credit Card 622 Other 8 Travellers Cheque 7 Name: payment_mode, dtype: int64
Tz['travel_with'].value_counts()
Alone 2379 Spouse 1005 Friends/Relatives 895 Spouse and Children 368 Children 162 Name: travel_with, dtype: int64
Tz['purpose'].value_counts()
Leisure and Holidays 2840 Business 671 Visiting Friends and Relatives 633 Meetings and Conference 312 Volunteering 138 Other 128 Scientific and Academic 87 Name: purpose, dtype: int64
Tz['most_impressing'].value_counts()
Friendly People 1541 Wildlife 1038 No comments 743 Wonderful Country, Landscape, Nature 507 Good service 365 friendly people 313 Excellent Experience 271 Satisfies and Hope Come Back 31 Name: most_impressing, dtype: int64
sns.pairplot(Tz)
<seaborn.axisgrid.PairGrid at 0x25b2bb913d0>
pip install pandas-profiling
Requirement already satisfied: pandas-profiling in c:\users\latitude e7470\anaconda3\lib\site-packages (3.3.0) Requirement already satisfied: visions[type_image_path]==0.7.5 in c:\users\latitude e7470\anaconda3\lib\site-packages (from pandas-profiling) (0.7.5) Requirement already satisfied: scipy<1.10,>=1.4.1 in c:\users\latitude e7470\anaconda3\lib\site-packages (from pandas-profiling) (1.7.3) Requirement already satisfied: jinja2<3.2,>=2.11.1 in c:\users\latitude e7470\anaconda3\lib\site-packages (from pandas-profiling) (2.11.3) Requirement already satisfied: phik<0.13,>=0.11.1 in c:\users\latitude e7470\anaconda3\lib\site-packages (from pandas-profiling) (0.12.2) Requirement already satisfied: tangled-up-in-unicode==0.2.0 in c:\users\latitude e7470\anaconda3\lib\site-packages (from pandas-profiling) (0.2.0) Requirement already satisfied: requests<2.29,>=2.24.0 in c:\users\latitude e7470\anaconda3\lib\site-packages (from pandas-profiling) (2.27.1) Requirement already satisfied: tqdm<4.65,>=4.48.2 in c:\users\latitude e7470\anaconda3\lib\site-packages (from pandas-profiling) (4.64.0) Requirement already satisfied: seaborn<0.12,>=0.10.1 in c:\users\latitude e7470\anaconda3\lib\site-packages (from pandas-profiling) (0.11.2) Requirement already satisfied: multimethod<1.9,>=1.4 in c:\users\latitude e7470\anaconda3\lib\site-packages (from pandas-profiling) (1.8) Requirement already satisfied: missingno<0.6,>=0.4.2 in c:\users\latitude e7470\anaconda3\lib\site-packages (from pandas-profiling) (0.5.1) Requirement already satisfied: PyYAML<6.1,>=5.0.0 in c:\users\latitude e7470\anaconda3\lib\site-packages (from pandas-profiling) (6.0) Requirement already satisfied: htmlmin==0.1.12 in c:\users\latitude e7470\anaconda3\lib\site-packages (from pandas-profiling) (0.1.12) Requirement already satisfied: statsmodels<0.14,>=0.13.2 in c:\users\latitude e7470\anaconda3\lib\site-packages (from pandas-profiling) (0.13.2) Requirement already satisfied: matplotlib<3.6,>=3.2 in c:\users\latitude e7470\anaconda3\lib\site-packages (from pandas-profiling) (3.5.1) Requirement already satisfied: pandas!=1.4.0,<1.5,>1.1 in c:\users\latitude e7470\anaconda3\lib\site-packages (from pandas-profiling) (1.4.2) Requirement already satisfied: joblib~=1.1.0 in c:\users\latitude e7470\anaconda3\lib\site-packages (from pandas-profiling) (1.1.0) Requirement already satisfied: pydantic<1.10,>=1.8.1 in c:\users\latitude e7470\anaconda3\lib\site-packages (from pandas-profiling) (1.9.2) Requirement already satisfied: numpy<1.24,>=1.16.0 in c:\users\latitude e7470\anaconda3\lib\site-packages (from pandas-profiling) (1.21.5) Requirement already satisfied: networkx>=2.4 in c:\users\latitude e7470\anaconda3\lib\site-packages (from visions[type_image_path]==0.7.5->pandas-profiling) (2.7.1) Requirement already satisfied: attrs>=19.3.0 in c:\users\latitude e7470\anaconda3\lib\site-packages (from visions[type_image_path]==0.7.5->pandas-profiling) (21.4.0) Requirement already satisfied: imagehash in c:\users\latitude e7470\anaconda3\lib\site-packages (from visions[type_image_path]==0.7.5->pandas-profiling) (4.3.1) Requirement already satisfied: Pillow in c:\users\latitude e7470\anaconda3\lib\site-packages (from visions[type_image_path]==0.7.5->pandas-profiling) (9.0.1) Requirement already satisfied: MarkupSafe>=0.23 in c:\users\latitude e7470\anaconda3\lib\site-packages (from jinja2<3.2,>=2.11.1->pandas-profiling) (2.0.1) Requirement already satisfied: cycler>=0.10 in c:\users\latitude e7470\anaconda3\lib\site-packages (from matplotlib<3.6,>=3.2->pandas-profiling) (0.11.0) Requirement already satisfied: fonttools>=4.22.0 in c:\users\latitude e7470\anaconda3\lib\site-packages (from matplotlib<3.6,>=3.2->pandas-profiling) (4.25.0) Requirement already satisfied: python-dateutil>=2.7 in c:\users\latitude e7470\anaconda3\lib\site-packages (from matplotlib<3.6,>=3.2->pandas-profiling) (2.8.2) Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\latitude e7470\anaconda3\lib\site-packages (from matplotlib<3.6,>=3.2->pandas-profiling) (1.3.2) Requirement already satisfied: packaging>=20.0 in c:\users\latitude e7470\anaconda3\lib\site-packages (from matplotlib<3.6,>=3.2->pandas-profiling) (21.3) Requirement already satisfied: pyparsing>=2.2.1 in c:\users\latitude e7470\anaconda3\lib\site-packages (from matplotlib<3.6,>=3.2->pandas-profiling) (3.0.4) Requirement already satisfied: pytz>=2020.1 in c:\users\latitude e7470\anaconda3\lib\site-packages (from pandas!=1.4.0,<1.5,>1.1->pandas-profiling) (2021.3) Requirement already satisfied: typing-extensions>=3.7.4.3 in c:\users\latitude e7470\anaconda3\lib\site-packages (from pydantic<1.10,>=1.8.1->pandas-profiling) (4.1.1) Requirement already satisfied: six>=1.5 in c:\users\latitude e7470\anaconda3\lib\site-packages (from python-dateutil>=2.7->matplotlib<3.6,>=3.2->pandas-profiling) (1.16.0) Requirement already satisfied: idna<4,>=2.5 in c:\users\latitude e7470\anaconda3\lib\site-packages (from requests<2.29,>=2.24.0->pandas-profiling) (3.3) Requirement already satisfied: certifi>=2017.4.17 in c:\users\latitude e7470\anaconda3\lib\site-packages (from requests<2.29,>=2.24.0->pandas-profiling) (2021.10.8) Requirement already satisfied: charset-normalizer~=2.0.0 in c:\users\latitude e7470\anaconda3\lib\site-packages (from requests<2.29,>=2.24.0->pandas-profiling) (2.0.4) Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\users\latitude e7470\anaconda3\lib\site-packages (from requests<2.29,>=2.24.0->pandas-profiling) (1.26.9) Requirement already satisfied: patsy>=0.5.2 in c:\users\latitude e7470\anaconda3\lib\site-packages (from statsmodels<0.14,>=0.13.2->pandas-profiling) (0.5.2) Requirement already satisfied: colorama in c:\users\latitude e7470\anaconda3\lib\site-packages (from tqdm<4.65,>=4.48.2->pandas-profiling) (0.4.4) Requirement already satisfied: PyWavelets in c:\users\latitude e7470\anaconda3\lib\site-packages (from imagehash->visions[type_image_path]==0.7.5->pandas-profiling) (1.3.0) Note: you may need to restart the kernel to use updated packages.
from pandas_profiling import ProfileReport
profile=ProfileReport(Tz)
profile
Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]
Generate report structure: 0%| | 0/1 [00:00<?, ?it/s]
Render HTML: 0%| | 0/1 [00:00<?, ?it/s]
sns.catplot(data=Tz, x='payment_mode', kind='count')
<seaborn.axisgrid.FacetGrid at 0x25b33251eb0>
sns.catplot(data=Tz, y='main_activity', kind='count')
<seaborn.axisgrid.FacetGrid at 0x25b405a6520>
country=Tz[['country', 'total_cost']].head(8).groupby('country').sum()
country
| total_cost | |
|---|---|
| country | |
| CHINA | 1657500.0 |
| SOUTH AFRICA | 466140.0 |
| SWIZERLAND | 674602.5 |
| UNITED KINGDOM | 14441106.5 |
| UNITED STATES OF AMERICA | 3480750.0 |
Age=Tz[['age_group', 'total_cost', 'travel_with']]
Age
| age_group | total_cost | travel_with | |
|---|---|---|---|
| 0 | 45-64 | 674602.5 | Friends/Relatives |
| 1 | 25-44 | 3214906.5 | Alone |
| 2 | 25-44 | 3315000.0 | Alone |
| 3 | 25-44 | 7790250.0 | Spouse |
| 4 | 1-24 | 1657500.0 | Alone |
| ... | ... | ... | ... |
| 4804 | 45-64 | 3315000.0 | Alone |
| 4805 | 25-44 | 10690875.0 | Spouse |
| 4806 | 1-24 | 2246636.7 | Alone |
| 4807 | 25-44 | 1160250.0 | Friends/Relatives |
| 4808 | 25-44 | 13260000.0 | Spouse |
4809 rows × 3 columns
Age.groupby('age_group').sum()
| total_cost | |
|---|---|
| age_group | |
| 1-24 | 3.379088e+09 |
| 25-44 | 1.498710e+10 |
| 45-64 | 1.537184e+10 |
| 65+ | 5.284068e+09 |
Age[['age_group']].value_counts()
age_group 25-44 2487 45-64 1391 1-24 624 65+ 307 dtype: int64
Age[['age_group']].value_counts().plot(kind='pie')
<AxesSubplot:ylabel='None'>
Tz[['night_mainland']].mean()
night_mainland 8.488043 dtype: float64
Tz[['night_zanzibar']].mean()
night_zanzibar 2.304429 dtype: float64
sns.distplot(Tz['total_cost']).set(title='SPENDING DISTRIBUTION')
C:\Users\Latitude E7470\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
[Text(0.5, 1.0, 'SPENDING DISTRIBUTION')]
import warnings
warnings.filterwarnings('ignore')
Tz.corr()
| total_female | total_male | night_mainland | night_zanzibar | total_cost | |
|---|---|---|---|---|---|
| total_female | 1.000000 | 0.467000 | 0.031233 | 0.138523 | 0.285862 |
| total_male | 0.467000 | 1.000000 | -0.041369 | 0.050172 | 0.183785 |
| night_mainland | 0.031233 | -0.041369 | 1.000000 | -0.118155 | 0.020473 |
| night_zanzibar | 0.138523 | 0.050172 | -0.118155 | 1.000000 | 0.145139 |
| total_cost | 0.285862 | 0.183785 | 0.020473 | 0.145139 | 1.000000 |
sns.heatmap(Tz.corr(), annot=True)
<AxesSubplot:>
Tz.columns
Index(['ID', 'country', 'age_group', 'travel_with', 'total_female',
'total_male', 'purpose', 'main_activity', 'info_source',
'tour_arrangement', 'package_transport_int', 'package_accomodation',
'package_food', 'package_transport_tz', 'package_sightseeing',
'package_guided_tour', 'package_insurance', 'night_mainland',
'night_zanzibar', 'payment_mode', 'first_trip_tz', 'most_impressing',
'total_cost'],
dtype='object')
#target varieble or prediction variable
x=Tz[['total_female',
'total_male','night_mainland',
'night_zanzibar',]]
y=Tz['total_cost']
#train, test, split.
from sklearn.model_selection import train_test_split
x_test, x_train, y_test, y_train=train_test_split(x,y,test_size=0.9,random_state=60)
#CREATING AND TRAINING A MODEL
from sklearn.linear_model import LinearRegression
#instantiate model
lm = LinearRegression()
lm.fit(x_train, y_train)
LinearRegression()
print(lm.intercept_)
4462382.634319042
lm.coef_
x_train.columns
Index(['total_female', 'total_male', 'night_mainland', 'night_zanzibar'], dtype='object')
Tz2 = pd.DataFrame(lm.coef_, x_train.columns, columns=['total_spendings'])
Tz2
| total_spendings | |
|---|---|
| total_female | 2.071454e+06 |
| total_male | 7.488670e+05 |
| night_mainland | 3.555253e+04 |
| night_zanzibar | 2.930572e+05 |
#evaluating our model performance on spending behaviour
prediction=lm.predict(x_test)
#predicted spending cost of tourists/indicating their spending behaviour
prediction
array([ 5317907.18511913, 5602327.3897838 , 9334103.93746147,
5708984.96653305, 5389012.23628529, 5957852.64561464,
5246802.13395296, 6676046.37885942, 5424564.76186838,
5246802.13395296, 6102326.68475304, 9582971.61654306,
12636660.34162493, 5282354.65953604, 7389360.82732717,
10458549.12266763, 5460117.28745146, 6410394.06156772,
8778173.26187351, 5353459.71070221, 8458200.53162576,
5424564.76186838, 9582482.18211578, 5246802.13395296,
5531222.33861763, 10799390.14237829, 7496018.40407642,
9334103.93746147, 5682069.47726866, 5744537.49211613,
8988220.09825763, 6137879.21033613, 7780438.60874109,
6960466.58352409, 9103514.71132558, 10600759.22499997,
5246802.13395296, 10316339.0203353 , 13175806.37900278,
7531570.92965951, 5566774.86420071, 11130778.79163185,
9334103.93746147, 6552604.16390006, 6604941.32769325,
5353459.71070221, 7389360.82732717, 6640493.85327634,
21589473.88347938, 13477011.22187756, 5389012.23628529,
8410976.91084743, 5389012.23628529, 6604941.32769325,
9041046.69647811, 5317907.18511913, 5460117.28745146,
10924326.17207324, 9716544.6825567 , 5282354.65953604,
5282354.65953604, 6457851.94058388, 7067124.16027335,
7531570.92965951, 6747151.43002559, 11954345.03367437,
8703799.89359296, 7460465.87849334, 11385504.62434502,
7496018.40407642, 12596064.99654867, 15850674.08726869,
7709333.55757493, 8529305.58279193, 7031571.63469026,
10133533.5729267 , 6953323.36194099, 14173051.59770792,
8846009.9959253 , 6277825.37586239, 9760734.24445848,
7673781.03199184, 10396081.10782016, 5282354.65953604,
6676046.37885942, 7531570.92965951, 5246802.13395296,
5424564.76186838, 5246802.13395296, 9041046.69647811,
5460117.28745146, 20337828.7317722 , 7884480.83751938,
9260734.94948923, 5460117.28745146, 11888282.80200137,
9674129.62303372, 5317907.18511913, 5566774.86420071,
9531919.52070138, 5282354.65953604, 6031221.63358687,
6711598.9044425 , 10799390.14237829, 10890058.71444168,
11903292.93783269, 11982545.59089027, 6676046.37885942,
5460117.28745146, 6889361.53235793, 5282354.65953604,
5282354.65953604, 7166638.51543949, 7546581.06549082,
7531570.92965951, 5708984.96653305, 6711598.9044425 ,
7638228.50640876, 6889361.53235793, 7602675.98082567,
5708984.96653305, 7638228.50640876, 9911091.94868223,
17719081.45029489, 7353808.30174408, 6640493.85327634,
9103025.2768983 , 7351544.36493802, 6640493.85327634,
6889361.53235793, 9929018.99046303, 9103025.2768983 ,
8668247.36800988, 10411091.24365147, 6604941.32769325,
11991672.06163624, 12048771.3572794 , 7031571.63469026,
7839638.30647419, 7673781.03199184, 5317907.18511913,
5744537.49211613, 9654076.66770923, 5282354.65953604,
9496856.42954557, 6339289.01040156, 5282354.65953604,
8061590.49629138, 6171167.79911314, 5424564.76186838,
4960117.99248222, 6031221.63358687, 5317907.18511913,
13915546.88230764, 6815641.13322079, 8363284.77359344,
8996857.13457633, 8747989.45549474, 6161526.38248614,
7496018.40407642, 7673781.03199184, 12110234.99181857,
5460117.28745146, 7709333.55757493, 7067124.16027335,
12098493.17310163, 8141332.58377624, 7493754.46727035,
5353459.71070221, 5389012.23628529, 11567820.63732634,
5424564.76186838, 5922300.12003155, 7262650.29525343,
7325118.3101009 , 6117336.82058436, 8454932.21451138,
6315641.83825155, 5317907.18511913, 11231063.26886847,
7875190.83205727, 7318255.776161 , 9041046.69647811,
6747151.43002559, 13021854.45795351, 8996857.13457633,
9103025.2768983 , 5317907.18511913, 7638228.50640876,
7353808.30174408, 5708984.96653305, 8668247.36800988,
5353459.71070221, 6811113.25960865, 5353459.71070221,
8961304.60899324, 13095574.85709064, 5353459.71070221,
5957852.64561464, 5424564.76186838, 18117653.86445682,
6640493.85327634, 5424564.76186838, 6676046.37885942,
5815642.5432823 , 7262650.29525343, 6640493.85327634,
8280437.90371034, 11231063.26886847, 5282354.65953604,
9334103.93746147, 9135962.45451042, 7460465.87849334,
6640493.85327634, 5531222.33861763, 7531570.92965951,
6102326.68475304, 5246802.13395296, 6782703.95560867,
9733563.57900465, 5424564.76186838, 6330651.97408286,
5353459.71070221, 7031571.63469026, 5637879.91536688,
11226020.4493753 , 7582133.59107391, 10625130.08981509,
6889361.53235793, 6782703.95560867, 13291615.93795175,
7274555.6486865 , 20597622.89543231, 6359831.40015332,
7600412.04401961, 5531222.33861763, 6517051.63831698,
7244886.78818876, 9031920.22573214, 9254361.84997661,
5389012.23628529, 5282354.65953604, 5246802.13395296,
5317907.18511913, 5637879.91536688, 7280439.31377185,
6960466.58352409, 12442113.0754994 , 5353459.71070221,
7280439.31377185, 5424564.76186838, 6640493.85327634,
7638228.50640876, 6711598.9044425 , 6223994.39733361,
7324628.87567363, 10082970.9115123 , 11678561.86532839,
5646516.95168557, 11781625.22525212, 5353459.71070221,
8176885.10935933, 9361019.42672586, 6028957.69678081,
5424564.76186838, 7424913.35291025, 7999122.48144391,
6552604.16390006, 9022513.06734296, 5246802.13395296,
12101597.95549988, 5353459.71070221, 9103025.2768983 ,
10538780.64457977, 7575760.49156128, 9582971.61654306,
6351194.36383463, 9547419.09095997, 5993405.17119772,
5317907.18511913, 6398488.70813465, 15048955.00354363,
8351542.95487651, 8659610.33169119, 7424913.35291025,
6315641.83825155, 9041046.69647811, 7389360.82732717,
5246802.13395296, 5353459.71070221, 5317907.18511913,
5389012.23628529, 15317875.63794971, 10280786.49475221,
7111803.1566024 , 5602327.3897838 , 7600412.04401961,
12953854.18918558, 5424564.76186838, 6604941.32769325,
9334103.93746147, 7496018.40407642, 8747989.45549474,
5282354.65953604, 9183256.79881044, 10568311.48181513,
10861368.72279849, 7496018.40407642, 7111803.1566024 ,
8561589.79126063, 14462351.08714963, 7839638.30647419,
9334103.93746147, 7699926.39918575, 7496018.40407642,
5211249.60836987, 11421057.14992811, 5460117.28745146,
10494591.08267799, 5353459.71070221, 8996857.13457633,
8339637.60144343, 5708984.96653305, 6410394.06156772,
6493404.46616697, 7744886.08315801, 5531222.33861763,
7531570.92965951, 7460465.87849334, 6889361.53235793,
5317907.18511913, 8161874.97352801, 7887096.18549035,
7531570.92965951, 6960466.58352409, 9289914.37555969,
7709333.55757493, 6173431.73591921, 7280439.31377185,
7460465.87849334, 5753174.52843483, 6925403.49236828,
9334103.93746147, 9538782.05464128, 14133271.58677007,
12387792.66254334, 7661875.67855877, 9103514.71132558,
5282354.65953604, 12154424.55372034, 5317907.18511913,
6747151.43002559, 5531222.33861763, 5282354.65953604,
8454932.21451138, 8363284.77359344, 9887444.77653222,
5317907.18511913, 7709333.55757493, 6676046.37885942,
9662224.26960064, 7602675.98082567, 21706894.41009102,
6386746.88941771, 7460465.87849334, 7344401.14335491,
8834104.64249223, 8161874.97352801, 8541047.40150886,
5780090.01769922, 8668247.36800988, 5566774.86420071,
9281277.339241 , 5995669.10800379, 5353459.71070221,
9334103.93746147, 6031221.63358687, 6066774.15916996,
8810457.47034222, 8925752.08341016, 11678561.86532839,
7999122.48144391, 7531570.92965951, 8339637.60144343,
5708984.96653305, 6173431.73591921, 6604941.32769325,
8349279.01807044, 8461305.314024 , 5637879.91536688,
7839638.30647419, 5531222.33861763, 5246802.13395296,
5282354.65953604, 6640493.85327634, 6676046.37885942,
11195510.74328539, 6640493.85327634, 6996019.10910718,
6676046.37885942, 5424564.76186838, 6676046.37885942,
9582971.61654306, 5637879.91536688, 11654425.2587511 ,
6604941.32769325, 7067613.59470062, 5246802.13395296,
7804085.7808911 , 7067124.16027335, 5895384.63076716,
9831839.29562465, 13241053.27653736, 8552952.75494193,
6604941.32769325, 6711598.9044425 , 8454932.21451138,
5939574.19266894, 6853809.00677484, 10194742.03127642,
6782703.95560867, 12952105.19826055, 9745724.10862716,
9567472.04628447, 5246802.13395296, 5637879.91536688,
26903467.72825824, 6552604.16390006, 6676046.37885942,
6031221.63358687, 5939574.19266894, 8176885.10935933,
6569388.80211017, 5460117.28745146, 5424564.76186838,
9582971.61654306, 5246802.13395296, 9334103.93746147,
8588505.28052502, 5317907.18511913, 5708984.96653305,
5575411.90051941, 6782703.95560867, 8286811.00322296,
7780438.60874109, 6711598.9044425 , 8668247.36800988,
11275252.83077025, 6640493.85327634, 11848972.52487663,
7638228.50640876, 6676046.37885942, 6102326.68475304,
7280928.74819913, 9254361.84997661, 8996367.70014905,
5317907.18511913, 7496018.40407642, 5531222.33861763,
10390548.85389971, 19268010.15861906, 5708984.96653305,
7424913.35291025, 5389012.23628529, 15604070.34499317,
8925262.64898288, 7318255.776161 , 5424564.76186838,
5317907.18511913, 8260384.94838585, 8854157.59781672,
8747989.45549474, 8925752.08341016, 5173433.14598072,
8552952.75494193, 9334103.93746147, 7353808.30174408,
7531570.92965951, 5282354.65953604, 6640493.85327634])
#y_test containing the correct spending cost/habit of tourist
y_test
4284 130000.0
3401 8453250.0
3204 7293000.0
3362 497250.0
3822 6132750.0
...
2147 6298500.0
1418 3480750.0
3654 8619000.0
3137 600000.0
2253 497250.0
Name: total_cost, Length: 480, dtype: float64
#comparing y_test to the prediction to find the residuals
plt.scatter(y_test, prediction/10**6)
<matplotlib.collections.PathCollection at 0x25b41197af0>
#plotting histogram histogram distribution of the residuals
sns.distplot(y_test- prediction/10**6)
<AxesSubplot:xlabel='total_cost', ylabel='Density'>
#looking at the linear evaluation metrics we get:
from sklearn import metrics
#average error of the model
metrics.mean_absolute_error(y_test, prediction)
7232385.834148327
#checking for larger error by squaring
metrics.mean_squared_error(y_test, prediction)
121440898468113.7
#RMSE
np.sqrt(metrics.mean_squared_error(y_test, prediction))
11020022.616497377
r_squared=lm.score(x,y)
print(r_squared)
0.09682259179087571
r_squared=lm.score(x_test, prediction)
print(r_squared)
1.0
from sklearn.metrics import r2_score
r2_score(y_test, prediction)
0.17874474458236633
r2_score(y_test, prediction).dtype
dtype('float64')
from sklearn.svm import SVC
tz=pd.read_csv('test .csv')
tz
| ID | country | age_group | travel_with | total_female | total_male | purpose | main_activity | info_source | tour_arrangement | ... | package_food | package_transport_tz | package_sightseeing | package_guided_tour | package_insurance | night_mainland | night_zanzibar | payment_mode | first_trip_tz | most_impressing | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | tour_1 | AUSTRALIA | 45-64 | Spouse | 1.0 | 1.0 | Leisure and Holidays | Wildlife tourism | Travel, agent, tour operator | Package Tour | ... | Yes | Yes | Yes | Yes | Yes | 10 | 3 | Cash | Yes | Wildlife |
| 1 | tour_100 | SOUTH AFRICA | 25-44 | Friends/Relatives | 0.0 | 4.0 | Business | Wildlife tourism | Tanzania Mission Abroad | Package Tour | ... | No | No | No | No | No | 13 | 0 | Cash | No | Wonderful Country, Landscape, Nature |
| 2 | tour_1001 | GERMANY | 25-44 | Friends/Relatives | 3.0 | 0.0 | Leisure and Holidays | Beach tourism | Friends, relatives | Independent | ... | No | No | No | No | No | 7 | 14 | Cash | No | No comments |
| 3 | tour_1006 | CANADA | 24-Jan | Friends/Relatives | 2.0 | 0.0 | Leisure and Holidays | Cultural tourism | others | Independent | ... | No | No | No | No | No | 0 | 4 | Cash | Yes | Friendly People |
| 4 | tour_1009 | UNITED KINGDOM | 45-64 | Friends/Relatives | 2.0 | 2.0 | Leisure and Holidays | Wildlife tourism | Friends, relatives | Package Tour | ... | Yes | Yes | No | No | No | 10 | 0 | Cash | Yes | Friendly People |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1596 | tour_988 | UNITED STATES OF AMERICA | 25-44 | NaN | 0.0 | 1.0 | Meetings and Conference | Mountain climbing | Newspaper, magazines,brochures | Independent | ... | No | No | No | No | No | 1 | 0 | Cash | No | NaN |
| 1597 | tour_990 | ITALY | 45-64 | Spouse and Children | 3.0 | 1.0 | Leisure and Holidays | Wildlife tourism | Friends, relatives | Package Tour | ... | Yes | Yes | Yes | No | No | 10 | 5 | Other | Yes | Wildlife |
| 1598 | tour_992 | FINLAND | 25-44 | Alone | 0.0 | 1.0 | Meetings and Conference | Mountain climbing | Friends, relatives | Independent | ... | No | No | No | No | No | 6 | 0 | Cash | Yes | No comments |
| 1599 | tour_996 | SOUTH AFRICA | 24-Jan | Alone | 0.0 | 1.0 | Business | Beach tourism | Friends, relatives | Independent | ... | No | No | No | No | No | 4 | 0 | Cash | Yes | Wildlife |
| 1600 | tour_998 | SOUTH AFRICA | 25-44 | Spouse | 1.0 | 1.0 | Leisure and Holidays | Cultural tourism | Radio, TV, Web | Independent | ... | No | No | No | No | No | 9 | 5 | Cash | Yes | Friendly People |
1601 rows × 22 columns
tz.corr()
| total_female | total_male | night_mainland | night_zanzibar | |
|---|---|---|---|---|
| total_female | 1.000000 | 0.288933 | 0.015265 | 0.078020 |
| total_male | 0.288933 | 1.000000 | -0.035880 | 0.020622 |
| night_mainland | 0.015265 | -0.035880 | 1.000000 | 0.516262 |
| night_zanzibar | 0.078020 | 0.020622 | 0.516262 | 1.000000 |
sns.heatmap(tz.corr(), annot=True)
<AxesSubplot:>
lm.fit(x,y)
LinearRegression()
y_pred=lm.predict(x_test)
y_pred
array([ 5082921.44689461, 5356361.05250345, 9505993.91042198,
5458900.90460677, 5151281.34829682, 5698160.5595145 ,
5014561.5454924 , 6635229.06918204, 5185461.29899793,
5014561.5454924 , 5851962.15137205, 9745253.56532971,
12918546.98376997, 5048741.49619351, 7335909.87225727,
10563018.706304 , 5219641.24969903, 6261982.81835897,
8686009.7386535 , 5117101.39759572, 8378390.18234355,
5185461.29899793, 9776798.26095758, 5014561.5454924 ,
5288001.15110124, 11129296.68918485, 7438449.72436058,
9505993.91042198, 5475941.9040494 , 5493080.85530787,
9044711.50368083, 5886142.10207316, 7711889.32996942,
6908668.67479088, 9198472.30592788, 10699738.50910842,
5014561.5454924 , 10426298.90349958, 13604544.55962311,
7472629.67506169, 5322181.10180235, 11366059.8304457 ,
9505993.91042198, 6398702.62116338, 6566869.16777983,
5117101.39759572, 7335909.87225727, 6601049.11848093,
22294929.97428396, 14011970.76114729, 5151281.34829682,
8056577.15789075, 5151281.34829682, 6566869.16777983,
9181333.3546694 , 5082921.44689461, 5219641.24969903,
11163574.5917018 , 9830752.4179904 , 5048741.49619351,
5048741.49619351, 6193761.6583831 , 7011208.52689419,
7472629.67506169, 6703588.97058425, 12325497.01190768,
8771271.89807199, 7404269.77365948, 11778617.80069 ,
7438449.72436058, 12989362.60920857, 16501681.53816029,
7643529.42856721, 8446750.08374576, 6977028.57619309,
10360394.72613376, 6655199.17914544, 14610209.66393506,
8907991.70087641, 6005780.11582445, 9916153.31883524,
7609349.47786611, 10545879.75504552, 5048741.49619351,
6635229.06918204, 7472629.67506169, 5014561.5454924 ,
5185461.29899793, 5014561.5454924 , 9181333.3546694 ,
5219641.24969903, 21186545.48683186, 7595123.26453357,
9420552.21996664, 5219641.24969903, 12152141.53436577,
9793978.00182656, 5082921.44689461, 5322181.10180235,
9657258.19902214, 5048741.49619351, 5783602.24996984,
6669409.01988314, 11129296.68918485, 11209565.82130956,
12237501.64560011, 12388627.19277392, 6635229.06918204,
5219641.24969903, 6840308.77338867, 5048741.49619351,
5048741.49619351, 6860278.88335207, 7557989.78629603,
7472629.67506169, 5458900.90460677, 6669409.01988314,
7575169.527165 , 6840308.77338867, 7540989.5764639 ,
5458900.90460677, 7575169.527165 , 10135638.76741126,
18164793.83984445, 7301729.92155616, 6601049.11848093,
9230017.00155574, 7284648.13250303, 6601049.11848093,
6840308.77338867, 9865152.68933886, 9230017.00155574,
8737091.94737088, 10631239.86627986, 6566869.16777983,
12408303.44728978, 12220599.38758383, 6977028.57619309,
7882650.3420486 , 7609349.47786611, 5082921.44689461,
5493080.85530787, 9813613.46673192, 5048741.49619351,
9591533.55269317, 6193622.91695676, 5048741.49619351,
8138950.99639896, 5903240.26372113, 5185461.29899793,
4724040.15083043, 5783602.24996984, 5082921.44689461,
14319729.05888359, 6552642.95444729, 8514832.50229527,
9095932.45382456, 8856672.79891683, 6022723.16345123,
7438449.72436058, 7609349.47786611, 12408442.18871613,
5219641.24969903, 7643529.42856721, 7011208.52689419,
12169460.0166611 , 8258531.84794491, 7421367.93530745,
5117101.39759572, 5151281.34829682, 11876066.67368369,
5185461.29899793, 5663980.6088134 , 7253005.48505932,
7270144.43631779, 5937322.26260639, 8532012.24316425,
6057041.85557868, 5082921.44689461, 11434460.52145841,
7916830.29274971, 7267549.97085506, 9181333.3546694 ,
6703588.97058425, 13228842.58476366, 9095932.45382456,
9230017.00155574, 5082921.44689461, 7575169.527165 ,
7301729.92155616, 5458900.90460677, 8737091.94737088,
5117101.39759572, 6518479.37634102, 5117101.39759572,
9061752.50312346, 13516508.40370504, 5117101.39759572,
5698160.5595145 , 5185461.29899793, 18996466.92168808,
6601049.11848093, 5185461.29899793, 6635229.06918204,
5561440.75671008, 7253005.48505932, 6601049.11848093,
8207490.42883803, 11434460.52145841, 5048741.49619351,
9505993.91042198, 9044891.03471768, 7404269.77365948,
6601049.11848093, 5288001.15110124, 7472629.67506169,
5851962.15137205, 5014561.5454924 , 6737768.92128535,
9574704.82947707, 5185461.29899793, 6142401.96681302,
5117101.39759572, 6977028.57619309, 5390541.00320456,
11539456.09759811, 7592169.73699713, 10414740.69042996,
6840308.77338867, 6737768.92128535, 13619146.2076242 ,
7150604.37438235, 21386493.42231836, 6142442.75642352,
7523907.78741077, 5288001.15110124, 6364522.67046228,
7182108.28039972, 9161657.10015353, 9386413.05887603,
5151281.34829682, 5048741.49619351, 5014561.5454924 ,
5082921.44689461, 5390541.00320456, 7216288.23110082,
6908668.67479088, 12613660.63434911, 5117101.39759572,
7216288.23110082, 5185461.29899793, 6601049.11848093,
7575169.527165 , 6669409.01988314, 6039862.11470971,
7301689.13194566, 10240854.66419831, 12103278.35644257,
5441761.95334829, 12049601.68226245, 5117101.39759572,
8292711.79864601, 9488952.91097935, 5766520.46091671,
5185461.29899793, 7370089.82295837, 8121812.04514049,
6398702.62116338, 8891105.81545497, 5014561.5454924 ,
12357221.2385724 , 5117101.39759572, 9230017.00155574,
10651054.86222208, 7558030.57590653, 9745253.56532971,
6091221.80627979, 9711073.61462861, 5732340.51021561,
5082921.44689461, 6364383.92903593, 15737813.39201348,
8275850.33024024, 8685870.99722715, 7370089.82295837,
6057041.85557868, 9181333.3546694 , 7335909.87225727,
5014561.5454924 , 5117101.39759572, 5082921.44689461,
5151281.34829682, 15957437.58201585, 10392118.95279847,
7065064.73211116, 5356361.05250345, 7523907.78741077,
13348243.90527275, 5185461.29899793, 6566869.16777983,
9505993.91042198, 7438449.72436058, 8856672.79891683,
5048741.49619351, 9318053.15747382, 10853319.78031862,
11177980.33607119, 7438449.72436058, 7065064.73211116,
8634552.09526757, 15120036.9761362 , 7882650.3420486 ,
9505993.91042198, 7372978.14386865, 7438449.72436058,
4980381.5947913 , 11812797.75139111, 5219641.24969903,
10565653.96137724, 5117101.39759572, 9095932.45382456,
8378251.4409172 , 5458900.90460677, 6261982.81835897,
6227941.60908421, 7677709.37926832, 5288001.15110124,
7472629.67506169, 7404269.77365948, 6840308.77338867,
5082921.44689461, 8207351.68741168, 7814429.18207274,
7472629.67506169, 6908668.67479088, 9420593.00957714,
7643529.42856721, 5920322.05277426, 7216288.23110082,
7404269.77365948, 5544301.80545161, 6842944.02846191,
9505993.91042198, 9659852.66448487, 14959319.18084992,
12679287.32886224, 7711750.58854308, 9198472.30592788,
5048741.49619351, 12493843.08956097, 5082921.44689461,
6703588.97058425, 5288001.15110124, 5048741.49619351,
8532012.24316425, 8514832.50229527, 9999057.70603319,
5082921.44689461, 7643529.42856721, 6635229.06918204,
9896379.11250352, 7540989.5764639 , 22699541.38969805,
6125401.75698089, 7404269.77365948, 7031178.6368576 ,
9010392.81155337, 8207351.68741168, 8685732.2558008 ,
5527260.80600898, 8737091.94737088, 5322181.10180235,
9369372.0594334 , 5749422.29926874, 5117101.39759572,
9505993.91042198, 5783602.24996984, 5817782.20067095,
8873811.7501753 , 9027572.55242235, 12103278.35644257,
8121812.04514049, 7472629.67506169, 8378251.4409172 ,
5458900.90460677, 5920322.05277426, 6566869.16777983,
8258768.54118711, 8566151.40425485, 5390541.00320456,
7882650.3420486 , 5288001.15110124, 5014561.5454924 ,
5048741.49619351, 6601049.11848093, 6635229.06918204,
11400280.5707573 , 6601049.11848093, 6942848.62549198,
6635229.06918204, 5185461.29899793, 6635229.06918204,
9745253.56532971, 5390541.00320456, 11998241.99069237,
6566869.16777983, 6979663.83126632, 5014561.5454924 ,
7848470.39134749, 7011208.52689419, 5681021.60825603,
9984513.22023745, 13499606.14568876, 8583331.14512383,
6566869.16777983, 6669409.01988314, 8532012.24316425,
5766422.50910087, 6806128.82268756, 10189748.03846527,
6737768.92128535, 13192002.96197366, 9830793.2076009 ,
9691438.14972324, 5014561.5454924 , 5390541.00320456,
28504021.72200732, 6398702.62116338, 6635229.06918204,
5783602.24996984, 5766422.50910087, 8292711.79864601,
6532689.21707872, 5219641.24969903, 5185461.29899793,
9745253.56532971, 5014561.5454924 , 9505993.91042198,
8617511.09582494, 5082921.44689461, 5458900.90460677,
5373402.05194608, 6737768.92128535, 8241629.58992863,
7711889.32996942, 6669409.01988314, 8737091.94737088,
11519861.42230325, 6601049.11848093, 12303128.34011323,
7575169.527165 , 6635229.06918204, 5851962.15137205,
7184743.53547296, 9386413.05887603, 9127477.14945243,
5082921.44689461, 7438449.72436058, 5288001.15110124,
10682420.02681309, 20207154.5680013 , 5458900.90460677,
7370089.82295837, 5151281.34829682, 16279503.67230569,
9059117.24805022, 7267549.97085506, 5185461.29899793,
5082921.44689461, 8227125.89374339, 8990757.34664801,
8856672.79891683, 9027572.55242235, 4929119.85503706,
8583331.14512383, 9505993.91042198, 7301729.92155616,
7472629.67506169, 5048741.49619351, 6601049.11848093])
#evaluate accuracy
from sklearn.metrics import accuracy_score
accuracy=lm.score(x_test, y_pred)
accuracy
1.0
x_test
| total_female | total_male | night_mainland | night_zanzibar | |
|---|---|---|---|---|
| 4284 | 0.0 | 1.0 | 3.0 | 0.0 |
| 3401 | 0.0 | 1.0 | 11.0 | 0.0 |
| 3204 | 1.0 | 1.0 | 0.0 | 7.0 |
| 3362 | 0.0 | 1.0 | 14.0 | 0.0 |
| 3822 | 0.0 | 1.0 | 5.0 | 0.0 |
| ... | ... | ... | ... | ... |
| 2147 | 1.0 | 1.0 | 0.0 | 7.0 |
| 1418 | 1.0 | 1.0 | 2.0 | 0.0 |
| 3654 | 1.0 | 1.0 | 7.0 | 0.0 |
| 3137 | 0.0 | 1.0 | 2.0 | 0.0 |
| 2253 | 1.0 | 0.0 | 3.0 | 0.0 |
480 rows × 4 columns